Merge pull request 'feat(health): detect no-Xid GPU freezes (open-module VA-space faults)' (#46) from feat/gpu-vaspace-spt into main
release / test (push) Successful in 13s
release / release (push) Successful in 17s

Reviewed-on: #46
This commit was merged in pull request #46.
This commit is contained in:
2026-05-29 14:10:58 +00:00
16 changed files with 493 additions and 12 deletions
+20
View File
@@ -5,6 +5,26 @@ All notable changes to RigDoctor are recorded here. Format follows
(`MAJOR.MINOR.PATCH`, pre-1.0). `__version__` and `pyproject.toml` must match the git
release tag (so the auto-updater, D18, can compare versions).
## [0.42.0] - 2026-05-29
### Added
- **Detect hard freezes that log no Xid.** The kernel-log scanner caught Xid codes, OOM, panic,
MCE, PCIe AER, thermal events, and amdgpu resets — but a crash that logs *no* Xid slipped
through. It now flags the NVIDIA open-kernel-module **VA-space mapping fault** (`gpu_vaspace.c`
/ `dmaAllocMapping` assertions, NVKMS GEM-allocation failures) — a driver-internal error that
can storm for minutes and end in a freeze without the GPU ever "falling off the bus" (distinct
from Xid 79). A new `check_nvidia_module()` notes when the open module (`nvidia-*-open`) is
loaded — the context behind these faults — and a new `ai_knowledge` entry lets the assistant
tell the no-Xid freeze apart from the Xid 79 hardware drop.
- **Add games no launcher reports (e.g. SPT).** A user-authored custom-games list
(`core/customgames.py`) shows alongside Steam/Lutris/Heroic in `rigdoctor games` and the GUI
("Add game…"), for standalone mod launchers (Single-Player Tarkov), itch.io downloads, or any
hand-installed game. Each entry can carry a launch command and a log directory:
`rigdoctor games add "SPT" --command .../tarkov.sh` (a sibling `logs/` is auto-detected),
`rigdoctor games play "SPT"` launches it under the crash-capture wrapper (tagged with the real
name, not the script's), and the diagnostic now tails the game's *own* logs — SPT's
server/launcher logs — alongside the kernel log so the analysis sees what the game logged
before the freeze.
## [0.41.0] - 2026-05-25
### Added
- **Import a crash dump (`.dmp`) and explain it with AI.** The **Games** page gains an
+1 -1
View File
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
[project]
name = "rigdoctor"
version = "0.41.0"
version = "0.42.0"
description = "Modular hardware monitoring & crash diagnostics for Linux gamers."
readme = "README.md"
requires-python = ">=3.11"
+1 -1
View File
@@ -1,3 +1,3 @@
"""RigDoctor — modular hardware monitoring & crash diagnostics for Linux gamers."""
__version__ = "0.41.0"
__version__ = "0.42.0"
+60 -2
View File
@@ -525,13 +525,13 @@ def cmd_gameenv(args) -> int:
def cmd_games(args) -> int:
from dataclasses import asdict
from .core import launchers, steam
from .core import customgames, launchers, steam
selected = steam.selected_library_paths()
result = steam.rescan() if selected else None
steam_games = result.games if result else []
extra = launchers.scan() # non-Steam (Lutris/Heroic)
all_games = list(steam_games) + list(extra)
all_games = list(steam_games) + list(extra) + customgames.scan() # + user-added (SPT etc.)
if args.json:
print(json.dumps({
@@ -596,6 +596,50 @@ def cmd_games_libraries(args) -> int:
return 0
def cmd_games_add(args) -> int:
from .core import customgames
if customgames.add(args.name, command=args.command, logdir=args.logdir):
print(f"Added '{args.name}' to your games (custom). It'll show in `rigdoctor games` "
"and the diagnostic game picker.")
entry = customgames.get(args.name) or {}
if entry.get("command"):
print(f" launch: {entry['command']} (run with: rigdoctor games play \"{args.name}\")")
if entry.get("logdir"):
print(f" logs: {entry['logdir']} (included in crash diagnostics)")
return 0
print(f"'{args.name}' is blank or already in your custom games.")
return 1
def cmd_games_play(args) -> int:
from .core import customgames, wrap
command = customgames.command(args.name)
if command is None:
if customgames.get(args.name) is None:
print(f"'{args.name}' isn't in your custom games. Add it: "
f"rigdoctor games add \"{args.name}\" --command <launch script>")
else:
print(f"'{args.name}' has no launch command. Set one: "
f"rigdoctor games remove \"{args.name}\" && rigdoctor games add \"{args.name}\" "
"--command <launch script>")
return 1
print(f"Launching '{args.name}' with crash-capture… (capture stops cleanly on exit; "
"a hard freeze is flagged next time you open RigDoctor)")
return wrap.run(command, game=args.name)
def cmd_games_remove(args) -> int:
from .core import customgames
if customgames.remove(args.name):
print(f"Removed '{args.name}' from your custom games.")
return 0
print(f"'{args.name}' isn't in your custom games. Current: {', '.join(customgames.names()) or '(none)'}")
return 1
def build_parser() -> argparse.ArgumentParser:
p = argparse.ArgumentParser(
prog="rigdoctor",
@@ -681,6 +725,20 @@ def build_parser() -> argparse.ArgumentParser:
lib_p.add_argument("--json", action="store_true", help="output JSON")
lib_p.set_defaults(func=cmd_games_libraries)
add_p = games_sub.add_parser("add", help="add a game no launcher reports (e.g. SPT)")
add_p.add_argument("name", help="game name, e.g. \"SPT\"")
add_p.add_argument("--command", default=None,
help="launch command/script (e.g. the path to tarkov.sh) — enables `games play`")
add_p.add_argument("--logdir", default=None,
help="the game's own log directory (auto-detected as <command dir>/logs if present)")
add_p.set_defaults(func=cmd_games_add)
play_p = games_sub.add_parser("play", help="launch a custom game with crash-capture (e.g. SPT)")
play_p.add_argument("name", help="game name to launch")
play_p.set_defaults(func=cmd_games_play)
rm_p = games_sub.add_parser("remove", help="remove a previously added custom game")
rm_p.add_argument("name", help="game name to remove")
rm_p.set_defaults(func=cmd_games_remove)
env_p = sub.add_parser("gameenv", help="gaming environment checks (M6): flag stability/perf settings")
env_p.add_argument("--json", action="store_true", help="output JSON instead of text")
env_p.set_defaults(func=cmd_gameenv)
+3
View File
@@ -36,6 +36,9 @@ SPAWN_LOG = STATE_DIR / "recorder.out"
# Gaming environment / game detection (M6) — cached Steam game scan (mutable state,
# not config: refreshed by the background scan on every launch).
GAMES_FILE = STATE_DIR / "games.json"
# User-added games that no launcher reports (e.g. SPT/standalone mod launchers). Authored
# by the user (not a refreshable cache), so it lives in DATA_DIR and persists across scans.
CUSTOM_GAMES_FILE = DATA_DIR / "custom-games.json"
# Logging & reports (opt-in via `logging_enabled`). App log: rotating file of app events.
# Each diagnostic is stored under DIAGNOSTICS_DIR/<id>/; "Report" zips one into REPORTS_DIR.
+8
View File
@@ -30,6 +30,14 @@ ENTRIES: list[tuple[tuple[str, ...], str]] = [
(("xid 8", "xid 62", "xid 63", "xid 64"),
"These Xid codes commonly indicate VRAM/ECC or memory-training problems — suspect failing "
"VRAM or an unstable memory overclock."),
(("va-space mapping", "gpu_vaspace", "dmaallocmapping", "nvkms memory for gem",
"open kernel module", "nvidia open"),
"NVIDIA open-kernel-module VA-space mapping errors (gpu_vaspace.c / dmaAllocMapping / "
"'Failed to allocate NVKMS memory for GEM object') are a driver-internal fault on the open "
"module (nvidia-*-open). They can storm for minutes and end in a HARD FREEZE with NO Xid "
"logged — so the GPU never 'falls off the bus', and this is distinct from the Xid 79 "
"hardware drop. Fix path: switch from the open to the proprietary NVIDIA kernel module and "
"update to the latest driver branch."),
(("smart 197", "current_pending_sector", "pending sector"),
"SMART 197 (Current Pending Sector) > 0 = sectors the drive can't read and is waiting to "
"reallocate — early sign of a failing disk. Back up now and run an extended self-test."),
+113
View File
@@ -0,0 +1,113 @@
"""User-added games (M6): a manual list for titles no launcher reports.
Some games never show up in a Steam/Lutris/Heroic scan standalone mod launchers like
**SPT** (Single-Player Tarkov), itch.io downloads, or any hand-installed executable. This
module keeps a small user-authored list so those still appear in the game list and can be
picked for a focused diagnostic, in the same `steam.Game` shape as every other source.
Each entry is a name plus two optionals: a **launch command** (so `rigdoctor games play`
can start it under the auto-capture wrapper) and a **log directory** (so a crash diagnostic
can read the game's own logs — e.g. SPT's `logs/tarkov-latest.log`). Stored as JSON in
`config.CUSTOM_GAMES_FILE`; stdlib only; every reader degrades to [] on a missing/bad file.
"""
from __future__ import annotations
import json
import os
import shlex
from .. import config
from .steam import Game
LAUNCHER = "custom"
def _load() -> list[dict]:
try:
data = json.loads(config.CUSTOM_GAMES_FILE.read_text())
except (OSError, ValueError):
return []
games = data.get("games") if isinstance(data, dict) else None
return [g for g in games if isinstance(g, dict) and g.get("name")] if isinstance(games, list) else []
def _save(games: list[dict]) -> None:
config.CUSTOM_GAMES_FILE.parent.mkdir(parents=True, exist_ok=True)
config.CUSTOM_GAMES_FILE.write_text(json.dumps({"games": games}, indent=2, ensure_ascii=False) + "\n")
def names() -> list[str]:
"""Just the stored names (insertion order preserved)."""
return [str(g["name"]) for g in _load()]
def get(name: str) -> dict | None:
"""The stored entry (name + optional command/logdir) for a game, case-insensitive."""
name = (name or "").strip().lower()
return next((g for g in _load() if str(g["name"]).lower() == name), None)
def add(name: str, command: str | None = None, logdir: str | None = None) -> bool:
"""Add a game by name, with an optional launch command and log directory.
Returns False if the name is blank or already present (case-insensitive). When a command
is given but no logdir, a sibling `logs/` dir is inferred if it exists (covers SPT's layout).
"""
name = (name or "").strip()
if not name:
return False
if get(name):
return False
entry: dict = {"name": name}
command = (command or "").strip()
if command:
entry["command"] = command
if not logdir:
sibling = os.path.join(os.path.dirname(_argv0(command)), "logs")
if os.path.isdir(sibling):
logdir = sibling
logdir = (logdir or "").strip()
if logdir:
entry["logdir"] = os.path.expanduser(logdir)
games = _load()
games.append(entry)
_save(games)
return True
def remove(name: str) -> bool:
"""Remove a game by name (case-insensitive). Returns True if one was removed."""
name = (name or "").strip().lower()
games = _load()
kept = [g for g in games if str(g["name"]).lower() != name]
if len(kept) == len(games):
return False
_save(kept)
return True
def _argv0(command: str) -> str:
parts = shlex.split(command)
return parts[0] if parts else command
def command(name: str) -> list[str] | None:
"""The launch argv for a game (shlex-split), or None if it has no command."""
entry = get(name)
cmd = (entry or {}).get("command")
return shlex.split(cmd) if cmd else None
def log_dir(name: str) -> str | None:
"""The game's own log directory, or None if it isn't set / doesn't exist."""
entry = get(name)
path = (entry or {}).get("logdir")
return path if path and os.path.isdir(path) else None
def scan() -> list[Game]:
"""User-added games as `Game` objects (launcher='custom'), sorted by name."""
out = [Game(appid="", name=str(g["name"]), library="", installdir="", launcher=LAUNCHER)
for g in _load()]
return sorted(out, key=lambda g: g.name.lower())
+1 -1
View File
@@ -75,7 +75,7 @@ def store(result, capture_path=None, since: float | None = None) -> Path | None:
_write(target / "report.txt", "\n".join(report))
try:
logs = gamelogs.collect(since=since)
logs = gamelogs.collect(since=since, game=getattr(result, "game", None))
if logs:
_write(target / "gamelogs.txt", logs)
except OSError:
+35 -2
View File
@@ -81,15 +81,48 @@ def available() -> bool:
return bool(_proton_logs() or _steam_console())
def collect(since: float | None = None, max_bytes: int = 8000) -> str:
"""Recent Proton + Steam log tails as one labelled text block ('' if none).
def _custom_game_logs(game: str, since: float | None, max_bytes: int) -> list[str]:
"""Tail the recent ``*.log`` files in a custom game's own log dir (e.g. SPT's
``logs/tarkov-latest.log`` + ``server-latest.log``), newest first, freshness-scoped by mtime.
Custom-game logs use their own timestamp formats, so we scope by file mtime (like the Proton
log) rather than the ``[YYYY-MM-DD ]`` line filter used for the Steam console.
"""
from . import customgames
directory = customgames.log_dir(game)
if not directory:
return []
try:
files = [p for p in Path(directory).glob("*.log") if p.is_file()]
except OSError:
return []
files.sort(key=_mtime, reverse=True)
sections: list[str] = []
for log in files[:4]: # a session touches a handful (tarkov/server/launcher latest)
if since is not None and _mtime(log) < since:
continue
tail = _tail(log, max_bytes).strip()
if tail:
sections.append(f"--- {game} log ({log.name}) ---\n{tail}")
return sections
def collect(since: float | None = None, max_bytes: int = 8000, game: str | None = None) -> str:
"""Recent Proton + Steam (+ custom-game) log tails as one labelled text block ('' if none).
With ``since`` (epoch), scope to that session: skip a Proton log not written during/after
the session (a stale per-app log from an earlier game), and keep only Steam-console lines
timestamped at/after ``since`` so we don't feed the model an unrelated past session.
``game`` (the diagnostic's focused title) pulls in that custom game's own logs if it has a
registered log dir e.g. SPT's server/launcher logs, which Steam/Proton never see.
"""
sections: list[str] = []
if game:
sections += _custom_game_logs(game, since, max_bytes)
protons = _proton_logs()
if protons:
log = protons[0]
+73
View File
@@ -116,6 +116,31 @@ def scan_journal_text(text: str) -> list[Finding]:
"Check power/thermals/driver; capture a session with `rigdoctor record`.",
))
# NVIDIA open-kernel-module VA-space mapping faults: a driver-internal failure that can
# storm for minutes and end in a HARD FREEZE with NO Xid logged — the GPU never "falls off
# the bus", so the Xid scan above misses it entirely. These code paths live in the open
# kernel module (nvidia-*-open); the proprietary module doesn't hit them.
nvrm_va = [
ln for ln in lines
if "gpu_vaspace.c" in ln
or "_gvaspaceMappingInsert" in ln
or "dmaAllocMapping" in ln
or "NVKMS memory for GEM object" in ln
]
if nvrm_va:
findings.append(Finding(
WARNING, "GPU", f"NVIDIA driver VA-space mapping errors ×{len(nvrm_va)}",
"The NVIDIA kernel module repeatedly failed to update the GPU's virtual address "
"space (gpu_vaspace / dmaAllocMapping assertions, NVKMS GEM-allocation failures). "
"This is a driver-internal fault that can recur for minutes and end in a hard freeze "
"with NO Xid logged — distinct from an Xid 79 hardware drop. These code paths are "
"specific to the open kernel module (nvidia-*-open).",
"If you're on the open module, switch to the proprietary NVIDIA driver "
"(install `nvidia-driver-###` instead of the `…-open` variant) and update to the "
"latest branch, then reboot. Capture a session with `rigdoctor record` to confirm "
"the errors precede the freeze.",
))
return findings
@@ -188,6 +213,53 @@ def check_nvidia_driver() -> list[Finding]:
return []
def _read_text(path: str) -> str | None:
try:
return Path(path).read_text()
except OSError:
return None
def _nvidia_module_is_open() -> bool | None:
"""Whether the *loaded* NVIDIA kernel module is the open-source flavor.
True = open (nvidia-*-open), False = proprietary, None = can't tell / no NVIDIA module.
/proc is authoritative for the loaded module and needs no external tool; modinfo's filename
(/nvidia-###-open/nvidia.ko) is the fallback.
"""
proc = _read_text("/proc/driver/nvidia/version")
if proc:
low = proc.lower()
if "open kernel module" in low:
return True
if "kernel module" in low: # proprietary banner: "NVIDIA UNIX … Kernel Module …"
return False
if shutil.which("modinfo"):
try:
out = subprocess.run(["modinfo", "nvidia"], capture_output=True, text=True, timeout=10).stdout
except (subprocess.SubprocessError, OSError):
out = ""
for line in out.splitlines():
if line.startswith("filename:"):
return "-open" in line
return None
def check_nvidia_module() -> list[Finding]:
"""Note when the open-source NVIDIA kernel module is loaded — the context behind the no-Xid
VA-space freeze signature, which lives in the open module's code paths (suggestion-only)."""
if _nvidia_module_is_open() is not True:
return []
return [Finding(
INFO, "Driver", "NVIDIA open kernel module in use",
"The loaded NVIDIA driver is the open-source kernel module (nvidia-*-open). It's fine for "
"most setups, but on some GeForce cards it hits driver-internal faults (VA-space mapping "
"errors, hard freezes with no Xid) that the proprietary module doesn't.",
"If you get unexplained hard freezes with no Xid in the logs, try the proprietary NVIDIA "
"driver (`nvidia-driver-###` rather than the `…-open` variant) on the latest branch.",
)]
def _smart_devices() -> list[str]:
try:
proc = subprocess.run(["smartctl", "--scan"], capture_output=True, text=True, timeout=10)
@@ -336,6 +408,7 @@ def run_health_checks(include_journal: bool = True) -> list[Finding]:
findings: list[Finding] = []
findings += check_nvidia_driver()
findings += check_nvidia_module()
if include_journal:
findings += check_journal()
findings += check_journal_persistence()
+7 -3
View File
@@ -40,16 +40,20 @@ def launch_option() -> str:
return f"{quoted} wrap %command%"
def run(command: list[str]) -> int:
def run(command: list[str], game: str | None = None) -> int:
"""Start a focused capture (unless one's already running), run the game, then stop it.
Returns the game's exit code so Steam sees the right status."""
Returns the game's exit code so Steam sees the right status.
`game` overrides name detection used by `games play` for a custom game (e.g. SPT), where
there's no SteamAppId and the bare script name (tarkov.sh) wouldn't tag the capture usefully.
"""
from . import diagnostic, reccontrol
if not command:
print("usage: rigdoctor wrap %command% (set as a Steam launch option)", file=sys.stderr)
return 2
game = game_name_from_env() or os.path.basename(command[0])
game = game or game_name_from_env() or os.path.basename(command[0])
started = False
if not reccontrol.running_pid(): # don't disturb an existing capture
started = diagnostic.start(game=game) is not None
+1 -1
View File
@@ -143,7 +143,7 @@ class DiagnosticDialog(QDialog):
lines.append("\nCapture summary:\n" + render_summary(summary))
since = (summary.start - 60) if summary.start else None
logs = gamelogs.collect(since=since) # scoped to this session
logs = gamelogs.collect(since=since, game=result.game) # scoped to this session
if logs:
lines.append("\nGame/Proton/Steam logs for this session:\n" + logs)
sys_logs = syslogs.collect(since=since) # kernel log + crashed-process records
+25 -1
View File
@@ -115,6 +115,10 @@ class GamesPage(QWidget):
self._autocap_btn = QPushButton("Auto-capture…")
self._autocap_btn.clicked.connect(self._show_autocapture)
header.addWidget(self._autocap_btn)
# Add a game no launcher reports (e.g. SPT / standalone mod launchers).
self._add_btn = QPushButton("Add game…")
self._add_btn.clicked.connect(self._add_custom_game)
header.addWidget(self._add_btn)
self._rescan_btn = QPushButton("Rescan")
self._rescan_btn.setObjectName("PrimaryButton")
self._rescan_btn.clicked.connect(self.refresh)
@@ -235,7 +239,9 @@ class GamesPage(QWidget):
]
self._libraries_ready.emit(libs)
try:
self._extra_games = launchers.scan() # Lutris / Heroic (non-Steam)
from ..core import customgames
# non-Steam: Lutris/Heroic + user-added games (SPT etc.)
self._extra_games = list(launchers.scan()) + customgames.scan()
except Exception:
self._extra_games = []
self._scanned.emit(steam.rescan())
@@ -423,6 +429,24 @@ class GamesPage(QWidget):
reccontrol.stop_background()
self._banner.hide()
def _add_custom_game(self) -> None:
"""Manually add a game no launcher reports (e.g. SPT), then rescan to show it."""
from PySide6.QtWidgets import QInputDialog
from ..core import customgames
name, ok = QInputDialog.getText(
self, "Add game", "Game name (e.g. SPT) — for titles no launcher reports:")
if not ok:
return
name = name.strip()
if not name:
return
if customgames.add(name):
self.refresh()
else:
QMessageBox.information(self, "Add game", f"'{name}' is already in your games.")
def _show_autocapture(self) -> None:
from ..core import wrap
+85
View File
@@ -0,0 +1,85 @@
"""Tests for user-added games (M6): add/remove/scan of titles no launcher reports (e.g. SPT)."""
import tempfile
import unittest
from pathlib import Path
from unittest import mock
from rigdoctor.core import customgames
class CustomGamesTests(unittest.TestCase):
def setUp(self):
self._tmp = tempfile.TemporaryDirectory()
self._file = Path(self._tmp.name) / "custom-games.json"
self._patch = mock.patch.object(customgames.config, "CUSTOM_GAMES_FILE", self._file)
self._patch.start()
def tearDown(self):
self._patch.stop()
self._tmp.cleanup()
def test_missing_file_scans_empty(self):
self.assertEqual(customgames.scan(), [])
self.assertEqual(customgames.names(), [])
def test_add_then_scan_returns_game(self):
self.assertTrue(customgames.add("SPT"))
games = customgames.scan()
self.assertEqual(len(games), 1)
self.assertEqual(games[0].name, "SPT")
self.assertEqual(games[0].launcher, "custom")
self.assertTrue(self._file.exists()) # persisted
def test_add_is_idempotent_case_insensitive(self):
self.assertTrue(customgames.add("SPT"))
self.assertFalse(customgames.add("spt")) # already present
self.assertFalse(customgames.add(" ")) # blank
self.assertEqual(customgames.names(), ["SPT"])
def test_remove(self):
customgames.add("SPT")
customgames.add("Minecraft")
self.assertTrue(customgames.remove("spt")) # case-insensitive
self.assertEqual(customgames.names(), ["Minecraft"])
self.assertFalse(customgames.remove("nope"))
def test_scan_sorted_by_name(self):
for n in ("Zomboid", "Apex", "SPT"):
customgames.add(n)
self.assertEqual([g.name for g in customgames.scan()], ["Apex", "SPT", "Zomboid"])
def test_command_and_logdir_stored_and_resolved(self):
logs = Path(self._tmp.name) / "logs"
logs.mkdir()
sh = Path(self._tmp.name) / "tarkov.sh"
sh.write_text("#!/bin/sh\n")
self.assertTrue(customgames.add("SPT", command=str(sh), logdir=str(logs)))
self.assertEqual(customgames.command("SPT"), [str(sh)])
self.assertEqual(customgames.log_dir("SPT"), str(logs))
def test_logdir_inferred_from_sibling_logs(self):
# A command with a sibling logs/ dir (SPT's layout) → logdir auto-detected.
sh = Path(self._tmp.name) / "tarkov.sh"
sh.write_text("#!/bin/sh\n")
(Path(self._tmp.name) / "logs").mkdir()
self.assertTrue(customgames.add("SPT", command=str(sh)))
self.assertEqual(customgames.log_dir("SPT"), str(Path(self._tmp.name) / "logs"))
def test_no_command_resolves_to_none(self):
customgames.add("SPT")
self.assertIsNone(customgames.command("SPT"))
self.assertIsNone(customgames.command("missing"))
self.assertIsNone(customgames.log_dir("SPT"))
def test_corrupt_file_degrades_to_empty(self):
self._file.parent.mkdir(parents=True, exist_ok=True)
self._file.write_text("{not json")
self.assertEqual(customgames.scan(), [])
# and a subsequent add still works (overwrites the garbage)
self.assertTrue(customgames.add("SPT"))
self.assertEqual(customgames.names(), ["SPT"])
if __name__ == "__main__":
unittest.main()
+30
View File
@@ -47,6 +47,36 @@ class CollectTests(unittest.TestCase):
self.assertEqual(gamelogs.collect(), "")
class CustomGameLogTests(unittest.TestCase):
def test_collect_includes_custom_game_logs(self):
tmp = Path(tempfile.mkdtemp())
(tmp / "tarkov-latest.log").write_text(">>> Tarkov gone. clean exit")
(tmp / "server-latest.log").write_text("SPT server error: mod failed to load")
with mock.patch.object(gamelogs, "_proton_logs", return_value=[]), \
mock.patch.object(gamelogs, "_steam_console", return_value=None), \
mock.patch("rigdoctor.core.customgames.log_dir", return_value=str(tmp)):
out = gamelogs.collect(game="SPT")
self.assertIn("SPT log", out)
self.assertIn("server-latest.log", out)
self.assertIn("mod failed to load", out)
def test_custom_logs_skipped_when_stale(self):
tmp = Path(tempfile.mkdtemp())
old = tmp / "tarkov-latest.log"
old.write_text("an earlier session")
old_mtime = time.time() - 3600
os.utime(old, (old_mtime, old_mtime))
with mock.patch.object(gamelogs, "_proton_logs", return_value=[]), \
mock.patch.object(gamelogs, "_steam_console", return_value=None), \
mock.patch("rigdoctor.core.customgames.log_dir", return_value=str(tmp)):
self.assertEqual(gamelogs.collect(since=time.time() - 60, game="SPT"), "")
def test_no_game_means_no_custom_logs(self):
with mock.patch.object(gamelogs, "_proton_logs", return_value=[]), \
mock.patch.object(gamelogs, "_steam_console", return_value=None):
self.assertEqual(gamelogs.collect(), "") # game=None → custom lookup skipped
class SinceScopingTests(unittest.TestCase):
def test_since_filter_keeps_window_only(self):
text = (
+30
View File
@@ -11,11 +11,19 @@ from rigdoctor.core.health import (
WARNING,
check_displays,
check_memory_speed,
check_nvidia_module,
check_pcie_links,
run_health_checks,
scan_journal_text,
)
# A real no-Xid freeze: the open-module VA-space storm captured on 2026-05-29.
_VASPACE_LOG = """\
NVRM: nvCheckFailedNoLog: Check failed: 0 == (pMapNode->gpuMask & gpuMask) @ gpu_vaspace.c:4547
NVRM: dmaAllocMapping_GM107: can't update VA space for mapping @vaddr=0x4be00000
[drm:nv_drm_gem_alloc_nvkms_memory_ioctl [nvidia_drm]] *ERROR* Failed to allocate NVKMS memory for GEM object
"""
class HealthScanTests(unittest.TestCase):
def test_xid_79_is_critical(self):
@@ -44,6 +52,28 @@ class HealthScanTests(unittest.TestCase):
def test_clean_text_yields_no_findings(self):
self.assertEqual(scan_journal_text("usb 1-1: new high-speed USB device\nbluetooth: ok"), [])
def test_vaspace_freeze_detected_without_any_xid(self):
findings = scan_journal_text(_VASPACE_LOG)
gpu = [f for f in findings if f.category == "GPU"]
self.assertEqual(len(gpu), 1)
self.assertEqual(gpu[0].severity, WARNING)
self.assertIn("VA-space", gpu[0].title)
# It must NOT be misreported as an Xid finding (the log has no Xid at all).
self.assertNotIn("Xid", gpu[0].title)
self.assertIn("open kernel module", gpu[0].detail.lower())
def test_open_module_finding_when_open_loaded(self):
with mock.patch("rigdoctor.core.health._nvidia_module_is_open", return_value=True):
findings = check_nvidia_module()
self.assertEqual(len(findings), 1)
self.assertEqual(findings[0].severity, INFO)
self.assertEqual(findings[0].category, "Driver")
def test_no_module_finding_when_proprietary_or_absent(self):
for state in (False, None):
with mock.patch("rigdoctor.core.health._nvidia_module_is_open", return_value=state):
self.assertEqual(check_nvidia_module(), [])
def test_run_health_checks_returns_findings(self):
# Runs against the real system; just assert it returns a sorted list of Findings.
findings = run_health_checks()