From b1bc961b792dadbad5a3500f960415982d2c0d6f Mon Sep 17 00:00:00 2001 From: Jessey van Offeren Date: Fri, 29 May 2026 16:07:14 +0200 Subject: [PATCH 1/3] feat(health): detect no-Xid GPU freezes (open-module VA-space faults) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The kernel-log scanner only caught Xid codes, OOM, panic, MCE, AER, thermal, and amdgpu resets — so a hard freeze that logs NO Xid slipped through entirely. Add detection for the NVIDIA open-kernel-module VA-space mapping fault (gpu_vaspace.c / dmaAllocMapping / NVKMS GEM-allocation failures), which can storm for minutes and end in a freeze without the GPU ever "falling off the bus". Also flag when the open kernel module (nvidia-*-open) is loaded — the context behind these faults — and add an AI-knowledge entry so the assistant distinguishes it from the Xid 79 hardware drop. Co-Authored-By: Claude Opus 4.8 (1M context) --- src/rigdoctor/core/ai_knowledge.py | 8 ++++ src/rigdoctor/core/health.py | 73 ++++++++++++++++++++++++++++++ tests/test_health.py | 30 ++++++++++++ 3 files changed, 111 insertions(+) diff --git a/src/rigdoctor/core/ai_knowledge.py b/src/rigdoctor/core/ai_knowledge.py index 336fb1d..799c145 100644 --- a/src/rigdoctor/core/ai_knowledge.py +++ b/src/rigdoctor/core/ai_knowledge.py @@ -30,6 +30,14 @@ ENTRIES: list[tuple[tuple[str, ...], str]] = [ (("xid 8", "xid 62", "xid 63", "xid 64"), "These Xid codes commonly indicate VRAM/ECC or memory-training problems — suspect failing " "VRAM or an unstable memory overclock."), + (("va-space mapping", "gpu_vaspace", "dmaallocmapping", "nvkms memory for gem", + "open kernel module", "nvidia open"), + "NVIDIA open-kernel-module VA-space mapping errors (gpu_vaspace.c / dmaAllocMapping / " + "'Failed to allocate NVKMS memory for GEM object') are a driver-internal fault on the open " + "module (nvidia-*-open). They can storm for minutes and end in a HARD FREEZE with NO Xid " + "logged — so the GPU never 'falls off the bus', and this is distinct from the Xid 79 " + "hardware drop. Fix path: switch from the open to the proprietary NVIDIA kernel module and " + "update to the latest driver branch."), (("smart 197", "current_pending_sector", "pending sector"), "SMART 197 (Current Pending Sector) > 0 = sectors the drive can't read and is waiting to " "reallocate — early sign of a failing disk. Back up now and run an extended self-test."), diff --git a/src/rigdoctor/core/health.py b/src/rigdoctor/core/health.py index 9be430c..efadf2d 100644 --- a/src/rigdoctor/core/health.py +++ b/src/rigdoctor/core/health.py @@ -116,6 +116,31 @@ def scan_journal_text(text: str) -> list[Finding]: "Check power/thermals/driver; capture a session with `rigdoctor record`.", )) + # NVIDIA open-kernel-module VA-space mapping faults: a driver-internal failure that can + # storm for minutes and end in a HARD FREEZE with NO Xid logged — the GPU never "falls off + # the bus", so the Xid scan above misses it entirely. These code paths live in the open + # kernel module (nvidia-*-open); the proprietary module doesn't hit them. + nvrm_va = [ + ln for ln in lines + if "gpu_vaspace.c" in ln + or "_gvaspaceMappingInsert" in ln + or "dmaAllocMapping" in ln + or "NVKMS memory for GEM object" in ln + ] + if nvrm_va: + findings.append(Finding( + WARNING, "GPU", f"NVIDIA driver VA-space mapping errors ×{len(nvrm_va)}", + "The NVIDIA kernel module repeatedly failed to update the GPU's virtual address " + "space (gpu_vaspace / dmaAllocMapping assertions, NVKMS GEM-allocation failures). " + "This is a driver-internal fault that can recur for minutes and end in a hard freeze " + "with NO Xid logged — distinct from an Xid 79 hardware drop. These code paths are " + "specific to the open kernel module (nvidia-*-open).", + "If you're on the open module, switch to the proprietary NVIDIA driver " + "(install `nvidia-driver-###` instead of the `…-open` variant) and update to the " + "latest branch, then reboot. Capture a session with `rigdoctor record` to confirm " + "the errors precede the freeze.", + )) + return findings @@ -188,6 +213,53 @@ def check_nvidia_driver() -> list[Finding]: return [] +def _read_text(path: str) -> str | None: + try: + return Path(path).read_text() + except OSError: + return None + + +def _nvidia_module_is_open() -> bool | None: + """Whether the *loaded* NVIDIA kernel module is the open-source flavor. + + True = open (nvidia-*-open), False = proprietary, None = can't tell / no NVIDIA module. + /proc is authoritative for the loaded module and needs no external tool; modinfo's filename + (…/nvidia-###-open/nvidia.ko) is the fallback. + """ + proc = _read_text("/proc/driver/nvidia/version") + if proc: + low = proc.lower() + if "open kernel module" in low: + return True + if "kernel module" in low: # proprietary banner: "NVIDIA UNIX … Kernel Module …" + return False + if shutil.which("modinfo"): + try: + out = subprocess.run(["modinfo", "nvidia"], capture_output=True, text=True, timeout=10).stdout + except (subprocess.SubprocessError, OSError): + out = "" + for line in out.splitlines(): + if line.startswith("filename:"): + return "-open" in line + return None + + +def check_nvidia_module() -> list[Finding]: + """Note when the open-source NVIDIA kernel module is loaded — the context behind the no-Xid + VA-space freeze signature, which lives in the open module's code paths (suggestion-only).""" + if _nvidia_module_is_open() is not True: + return [] + return [Finding( + INFO, "Driver", "NVIDIA open kernel module in use", + "The loaded NVIDIA driver is the open-source kernel module (nvidia-*-open). It's fine for " + "most setups, but on some GeForce cards it hits driver-internal faults (VA-space mapping " + "errors, hard freezes with no Xid) that the proprietary module doesn't.", + "If you get unexplained hard freezes with no Xid in the logs, try the proprietary NVIDIA " + "driver (`nvidia-driver-###` rather than the `…-open` variant) on the latest branch.", + )] + + def _smart_devices() -> list[str]: try: proc = subprocess.run(["smartctl", "--scan"], capture_output=True, text=True, timeout=10) @@ -336,6 +408,7 @@ def run_health_checks(include_journal: bool = True) -> list[Finding]: findings: list[Finding] = [] findings += check_nvidia_driver() + findings += check_nvidia_module() if include_journal: findings += check_journal() findings += check_journal_persistence() diff --git a/tests/test_health.py b/tests/test_health.py index 4d6078d..a5d19e2 100644 --- a/tests/test_health.py +++ b/tests/test_health.py @@ -11,11 +11,19 @@ from rigdoctor.core.health import ( WARNING, check_displays, check_memory_speed, + check_nvidia_module, check_pcie_links, run_health_checks, scan_journal_text, ) +# A real no-Xid freeze: the open-module VA-space storm captured on 2026-05-29. +_VASPACE_LOG = """\ +NVRM: nvCheckFailedNoLog: Check failed: 0 == (pMapNode->gpuMask & gpuMask) @ gpu_vaspace.c:4547 +NVRM: dmaAllocMapping_GM107: can't update VA space for mapping @vaddr=0x4be00000 +[drm:nv_drm_gem_alloc_nvkms_memory_ioctl [nvidia_drm]] *ERROR* Failed to allocate NVKMS memory for GEM object +""" + class HealthScanTests(unittest.TestCase): def test_xid_79_is_critical(self): @@ -44,6 +52,28 @@ class HealthScanTests(unittest.TestCase): def test_clean_text_yields_no_findings(self): self.assertEqual(scan_journal_text("usb 1-1: new high-speed USB device\nbluetooth: ok"), []) + def test_vaspace_freeze_detected_without_any_xid(self): + findings = scan_journal_text(_VASPACE_LOG) + gpu = [f for f in findings if f.category == "GPU"] + self.assertEqual(len(gpu), 1) + self.assertEqual(gpu[0].severity, WARNING) + self.assertIn("VA-space", gpu[0].title) + # It must NOT be misreported as an Xid finding (the log has no Xid at all). + self.assertNotIn("Xid", gpu[0].title) + self.assertIn("open kernel module", gpu[0].detail.lower()) + + def test_open_module_finding_when_open_loaded(self): + with mock.patch("rigdoctor.core.health._nvidia_module_is_open", return_value=True): + findings = check_nvidia_module() + self.assertEqual(len(findings), 1) + self.assertEqual(findings[0].severity, INFO) + self.assertEqual(findings[0].category, "Driver") + + def test_no_module_finding_when_proprietary_or_absent(self): + for state in (False, None): + with mock.patch("rigdoctor.core.health._nvidia_module_is_open", return_value=state): + self.assertEqual(check_nvidia_module(), []) + def test_run_health_checks_returns_findings(self): # Runs against the real system; just assert it returns a sorted list of Findings. findings = run_health_checks() -- 2.52.0 From b9bfec961c5d6c04a469470ce35d030847854721 Mon Sep 17 00:00:00 2001 From: Jessey van Offeren Date: Fri, 29 May 2026 16:07:25 +0200 Subject: [PATCH 2/3] feat(games): manually add games (e.g. SPT) with launch + own logs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some titles never show up in a Steam/Lutris/Heroic scan — standalone mod launchers like SPT (Single-Player Tarkov), itch.io downloads, hand-installed executables. Add a user-authored custom-games list (core/customgames.py) shown alongside the other sources in `rigdoctor games` and the GUI. Each entry can carry a launch command and a log directory: - `rigdoctor games add "SPT" --command .../tarkov.sh` (logs/ auto-detected) - `rigdoctor games play "SPT"` launches it under the crash-capture wrapper (wrap.run gains an explicit game-name override, since there's no SteamAppId) - the diagnostic now feeds the game's own logs to the analysis: gamelogs .collect(game=...) tails the registered log dir (SPT's server/launcher logs) alongside the kernel log, freshness-scoped by mtime. Co-Authored-By: Claude Opus 4.8 (1M context) --- src/rigdoctor/cli.py | 62 +++++++++++++- src/rigdoctor/config.py | 3 + src/rigdoctor/core/customgames.py | 113 +++++++++++++++++++++++++ src/rigdoctor/core/diagstore.py | 2 +- src/rigdoctor/core/gamelogs.py | 37 +++++++- src/rigdoctor/core/wrap.py | 10 ++- src/rigdoctor/gui/diagnostic_dialog.py | 2 +- src/rigdoctor/gui/games_page.py | 26 +++++- tests/test_customgames.py | 85 +++++++++++++++++++ tests/test_gamelogs.py | 30 +++++++ 10 files changed, 360 insertions(+), 10 deletions(-) create mode 100644 src/rigdoctor/core/customgames.py create mode 100644 tests/test_customgames.py diff --git a/src/rigdoctor/cli.py b/src/rigdoctor/cli.py index 507b316..4b170bf 100644 --- a/src/rigdoctor/cli.py +++ b/src/rigdoctor/cli.py @@ -525,13 +525,13 @@ def cmd_gameenv(args) -> int: def cmd_games(args) -> int: from dataclasses import asdict - from .core import launchers, steam + from .core import customgames, launchers, steam selected = steam.selected_library_paths() result = steam.rescan() if selected else None steam_games = result.games if result else [] extra = launchers.scan() # non-Steam (Lutris/Heroic) - all_games = list(steam_games) + list(extra) + all_games = list(steam_games) + list(extra) + customgames.scan() # + user-added (SPT etc.) if args.json: print(json.dumps({ @@ -596,6 +596,50 @@ def cmd_games_libraries(args) -> int: return 0 +def cmd_games_add(args) -> int: + from .core import customgames + + if customgames.add(args.name, command=args.command, logdir=args.logdir): + print(f"Added '{args.name}' to your games (custom). It'll show in `rigdoctor games` " + "and the diagnostic game picker.") + entry = customgames.get(args.name) or {} + if entry.get("command"): + print(f" launch: {entry['command']} (run with: rigdoctor games play \"{args.name}\")") + if entry.get("logdir"): + print(f" logs: {entry['logdir']} (included in crash diagnostics)") + return 0 + print(f"'{args.name}' is blank or already in your custom games.") + return 1 + + +def cmd_games_play(args) -> int: + from .core import customgames, wrap + + command = customgames.command(args.name) + if command is None: + if customgames.get(args.name) is None: + print(f"'{args.name}' isn't in your custom games. Add it: " + f"rigdoctor games add \"{args.name}\" --command ") + else: + print(f"'{args.name}' has no launch command. Set one: " + f"rigdoctor games remove \"{args.name}\" && rigdoctor games add \"{args.name}\" " + "--command ") + return 1 + print(f"Launching '{args.name}' with crash-capture… (capture stops cleanly on exit; " + "a hard freeze is flagged next time you open RigDoctor)") + return wrap.run(command, game=args.name) + + +def cmd_games_remove(args) -> int: + from .core import customgames + + if customgames.remove(args.name): + print(f"Removed '{args.name}' from your custom games.") + return 0 + print(f"'{args.name}' isn't in your custom games. Current: {', '.join(customgames.names()) or '(none)'}") + return 1 + + def build_parser() -> argparse.ArgumentParser: p = argparse.ArgumentParser( prog="rigdoctor", @@ -681,6 +725,20 @@ def build_parser() -> argparse.ArgumentParser: lib_p.add_argument("--json", action="store_true", help="output JSON") lib_p.set_defaults(func=cmd_games_libraries) + add_p = games_sub.add_parser("add", help="add a game no launcher reports (e.g. SPT)") + add_p.add_argument("name", help="game name, e.g. \"SPT\"") + add_p.add_argument("--command", default=None, + help="launch command/script (e.g. the path to tarkov.sh) — enables `games play`") + add_p.add_argument("--logdir", default=None, + help="the game's own log directory (auto-detected as /logs if present)") + add_p.set_defaults(func=cmd_games_add) + play_p = games_sub.add_parser("play", help="launch a custom game with crash-capture (e.g. SPT)") + play_p.add_argument("name", help="game name to launch") + play_p.set_defaults(func=cmd_games_play) + rm_p = games_sub.add_parser("remove", help="remove a previously added custom game") + rm_p.add_argument("name", help="game name to remove") + rm_p.set_defaults(func=cmd_games_remove) + env_p = sub.add_parser("gameenv", help="gaming environment checks (M6): flag stability/perf settings") env_p.add_argument("--json", action="store_true", help="output JSON instead of text") env_p.set_defaults(func=cmd_gameenv) diff --git a/src/rigdoctor/config.py b/src/rigdoctor/config.py index dc242fb..ce296fd 100644 --- a/src/rigdoctor/config.py +++ b/src/rigdoctor/config.py @@ -36,6 +36,9 @@ SPAWN_LOG = STATE_DIR / "recorder.out" # Gaming environment / game detection (M6) — cached Steam game scan (mutable state, # not config: refreshed by the background scan on every launch). GAMES_FILE = STATE_DIR / "games.json" +# User-added games that no launcher reports (e.g. SPT/standalone mod launchers). Authored +# by the user (not a refreshable cache), so it lives in DATA_DIR and persists across scans. +CUSTOM_GAMES_FILE = DATA_DIR / "custom-games.json" # Logging & reports (opt-in via `logging_enabled`). App log: rotating file of app events. # Each diagnostic is stored under DIAGNOSTICS_DIR//; "Report" zips one into REPORTS_DIR. diff --git a/src/rigdoctor/core/customgames.py b/src/rigdoctor/core/customgames.py new file mode 100644 index 0000000..51f01e0 --- /dev/null +++ b/src/rigdoctor/core/customgames.py @@ -0,0 +1,113 @@ +"""User-added games (M6): a manual list for titles no launcher reports. + +Some games never show up in a Steam/Lutris/Heroic scan — standalone mod launchers like +**SPT** (Single-Player Tarkov), itch.io downloads, or any hand-installed executable. This +module keeps a small user-authored list so those still appear in the game list and can be +picked for a focused diagnostic, in the same `steam.Game` shape as every other source. + +Each entry is a name plus two optionals: a **launch command** (so `rigdoctor games play` +can start it under the auto-capture wrapper) and a **log directory** (so a crash diagnostic +can read the game's own logs — e.g. SPT's `logs/tarkov-latest.log`). Stored as JSON in +`config.CUSTOM_GAMES_FILE`; stdlib only; every reader degrades to [] on a missing/bad file. +""" + +from __future__ import annotations + +import json +import os +import shlex + +from .. import config +from .steam import Game + +LAUNCHER = "custom" + + +def _load() -> list[dict]: + try: + data = json.loads(config.CUSTOM_GAMES_FILE.read_text()) + except (OSError, ValueError): + return [] + games = data.get("games") if isinstance(data, dict) else None + return [g for g in games if isinstance(g, dict) and g.get("name")] if isinstance(games, list) else [] + + +def _save(games: list[dict]) -> None: + config.CUSTOM_GAMES_FILE.parent.mkdir(parents=True, exist_ok=True) + config.CUSTOM_GAMES_FILE.write_text(json.dumps({"games": games}, indent=2, ensure_ascii=False) + "\n") + + +def names() -> list[str]: + """Just the stored names (insertion order preserved).""" + return [str(g["name"]) for g in _load()] + + +def get(name: str) -> dict | None: + """The stored entry (name + optional command/logdir) for a game, case-insensitive.""" + name = (name or "").strip().lower() + return next((g for g in _load() if str(g["name"]).lower() == name), None) + + +def add(name: str, command: str | None = None, logdir: str | None = None) -> bool: + """Add a game by name, with an optional launch command and log directory. + + Returns False if the name is blank or already present (case-insensitive). When a command + is given but no logdir, a sibling `logs/` dir is inferred if it exists (covers SPT's layout). + """ + name = (name or "").strip() + if not name: + return False + if get(name): + return False + entry: dict = {"name": name} + command = (command or "").strip() + if command: + entry["command"] = command + if not logdir: + sibling = os.path.join(os.path.dirname(_argv0(command)), "logs") + if os.path.isdir(sibling): + logdir = sibling + logdir = (logdir or "").strip() + if logdir: + entry["logdir"] = os.path.expanduser(logdir) + games = _load() + games.append(entry) + _save(games) + return True + + +def remove(name: str) -> bool: + """Remove a game by name (case-insensitive). Returns True if one was removed.""" + name = (name or "").strip().lower() + games = _load() + kept = [g for g in games if str(g["name"]).lower() != name] + if len(kept) == len(games): + return False + _save(kept) + return True + + +def _argv0(command: str) -> str: + parts = shlex.split(command) + return parts[0] if parts else command + + +def command(name: str) -> list[str] | None: + """The launch argv for a game (shlex-split), or None if it has no command.""" + entry = get(name) + cmd = (entry or {}).get("command") + return shlex.split(cmd) if cmd else None + + +def log_dir(name: str) -> str | None: + """The game's own log directory, or None if it isn't set / doesn't exist.""" + entry = get(name) + path = (entry or {}).get("logdir") + return path if path and os.path.isdir(path) else None + + +def scan() -> list[Game]: + """User-added games as `Game` objects (launcher='custom'), sorted by name.""" + out = [Game(appid="", name=str(g["name"]), library="", installdir="", launcher=LAUNCHER) + for g in _load()] + return sorted(out, key=lambda g: g.name.lower()) diff --git a/src/rigdoctor/core/diagstore.py b/src/rigdoctor/core/diagstore.py index f883182..ea9c36b 100644 --- a/src/rigdoctor/core/diagstore.py +++ b/src/rigdoctor/core/diagstore.py @@ -75,7 +75,7 @@ def store(result, capture_path=None, since: float | None = None) -> Path | None: _write(target / "report.txt", "\n".join(report)) try: - logs = gamelogs.collect(since=since) + logs = gamelogs.collect(since=since, game=getattr(result, "game", None)) if logs: _write(target / "gamelogs.txt", logs) except OSError: diff --git a/src/rigdoctor/core/gamelogs.py b/src/rigdoctor/core/gamelogs.py index 57ba2c5..f39922c 100644 --- a/src/rigdoctor/core/gamelogs.py +++ b/src/rigdoctor/core/gamelogs.py @@ -81,15 +81,48 @@ def available() -> bool: return bool(_proton_logs() or _steam_console()) -def collect(since: float | None = None, max_bytes: int = 8000) -> str: - """Recent Proton + Steam log tails as one labelled text block ('' if none). +def _custom_game_logs(game: str, since: float | None, max_bytes: int) -> list[str]: + """Tail the recent ``*.log`` files in a custom game's own log dir (e.g. SPT's + ``logs/tarkov-latest.log`` + ``server-latest.log``), newest first, freshness-scoped by mtime. + + Custom-game logs use their own timestamp formats, so we scope by file mtime (like the Proton + log) rather than the ``[YYYY-MM-DD …]`` line filter used for the Steam console. + """ + from . import customgames + + directory = customgames.log_dir(game) + if not directory: + return [] + try: + files = [p for p in Path(directory).glob("*.log") if p.is_file()] + except OSError: + return [] + files.sort(key=_mtime, reverse=True) + sections: list[str] = [] + for log in files[:4]: # a session touches a handful (tarkov/server/launcher latest) + if since is not None and _mtime(log) < since: + continue + tail = _tail(log, max_bytes).strip() + if tail: + sections.append(f"--- {game} log ({log.name}) ---\n{tail}") + return sections + + +def collect(since: float | None = None, max_bytes: int = 8000, game: str | None = None) -> str: + """Recent Proton + Steam (+ custom-game) log tails as one labelled text block ('' if none). With ``since`` (epoch), scope to that session: skip a Proton log not written during/after the session (a stale per-app log from an earlier game), and keep only Steam-console lines timestamped at/after ``since`` — so we don't feed the model an unrelated past session. + + ``game`` (the diagnostic's focused title) pulls in that custom game's own logs if it has a + registered log dir — e.g. SPT's server/launcher logs, which Steam/Proton never see. """ sections: list[str] = [] + if game: + sections += _custom_game_logs(game, since, max_bytes) + protons = _proton_logs() if protons: log = protons[0] diff --git a/src/rigdoctor/core/wrap.py b/src/rigdoctor/core/wrap.py index 84898d9..2a3427d 100644 --- a/src/rigdoctor/core/wrap.py +++ b/src/rigdoctor/core/wrap.py @@ -40,16 +40,20 @@ def launch_option() -> str: return f"{quoted} wrap %command%" -def run(command: list[str]) -> int: +def run(command: list[str], game: str | None = None) -> int: """Start a focused capture (unless one's already running), run the game, then stop it. - Returns the game's exit code so Steam sees the right status.""" + Returns the game's exit code so Steam sees the right status. + + `game` overrides name detection — used by `games play` for a custom game (e.g. SPT), where + there's no SteamAppId and the bare script name (tarkov.sh) wouldn't tag the capture usefully. + """ from . import diagnostic, reccontrol if not command: print("usage: rigdoctor wrap %command% (set as a Steam launch option)", file=sys.stderr) return 2 - game = game_name_from_env() or os.path.basename(command[0]) + game = game or game_name_from_env() or os.path.basename(command[0]) started = False if not reccontrol.running_pid(): # don't disturb an existing capture started = diagnostic.start(game=game) is not None diff --git a/src/rigdoctor/gui/diagnostic_dialog.py b/src/rigdoctor/gui/diagnostic_dialog.py index 0e1fa86..96c3603 100644 --- a/src/rigdoctor/gui/diagnostic_dialog.py +++ b/src/rigdoctor/gui/diagnostic_dialog.py @@ -143,7 +143,7 @@ class DiagnosticDialog(QDialog): lines.append("\nCapture summary:\n" + render_summary(summary)) since = (summary.start - 60) if summary.start else None - logs = gamelogs.collect(since=since) # scoped to this session + logs = gamelogs.collect(since=since, game=result.game) # scoped to this session if logs: lines.append("\nGame/Proton/Steam logs for this session:\n" + logs) sys_logs = syslogs.collect(since=since) # kernel log + crashed-process records diff --git a/src/rigdoctor/gui/games_page.py b/src/rigdoctor/gui/games_page.py index 2ef3b8a..1c95ed1 100644 --- a/src/rigdoctor/gui/games_page.py +++ b/src/rigdoctor/gui/games_page.py @@ -115,6 +115,10 @@ class GamesPage(QWidget): self._autocap_btn = QPushButton("Auto-capture…") self._autocap_btn.clicked.connect(self._show_autocapture) header.addWidget(self._autocap_btn) + # Add a game no launcher reports (e.g. SPT / standalone mod launchers). + self._add_btn = QPushButton("Add game…") + self._add_btn.clicked.connect(self._add_custom_game) + header.addWidget(self._add_btn) self._rescan_btn = QPushButton("Rescan") self._rescan_btn.setObjectName("PrimaryButton") self._rescan_btn.clicked.connect(self.refresh) @@ -235,7 +239,9 @@ class GamesPage(QWidget): ] self._libraries_ready.emit(libs) try: - self._extra_games = launchers.scan() # Lutris / Heroic (non-Steam) + from ..core import customgames + # non-Steam: Lutris/Heroic + user-added games (SPT etc.) + self._extra_games = list(launchers.scan()) + customgames.scan() except Exception: self._extra_games = [] self._scanned.emit(steam.rescan()) @@ -423,6 +429,24 @@ class GamesPage(QWidget): reccontrol.stop_background() self._banner.hide() + def _add_custom_game(self) -> None: + """Manually add a game no launcher reports (e.g. SPT), then rescan to show it.""" + from PySide6.QtWidgets import QInputDialog + + from ..core import customgames + + name, ok = QInputDialog.getText( + self, "Add game", "Game name (e.g. SPT) — for titles no launcher reports:") + if not ok: + return + name = name.strip() + if not name: + return + if customgames.add(name): + self.refresh() + else: + QMessageBox.information(self, "Add game", f"'{name}' is already in your games.") + def _show_autocapture(self) -> None: from ..core import wrap diff --git a/tests/test_customgames.py b/tests/test_customgames.py new file mode 100644 index 0000000..e7a94a3 --- /dev/null +++ b/tests/test_customgames.py @@ -0,0 +1,85 @@ +"""Tests for user-added games (M6): add/remove/scan of titles no launcher reports (e.g. SPT).""" + +import tempfile +import unittest +from pathlib import Path +from unittest import mock + +from rigdoctor.core import customgames + + +class CustomGamesTests(unittest.TestCase): + def setUp(self): + self._tmp = tempfile.TemporaryDirectory() + self._file = Path(self._tmp.name) / "custom-games.json" + self._patch = mock.patch.object(customgames.config, "CUSTOM_GAMES_FILE", self._file) + self._patch.start() + + def tearDown(self): + self._patch.stop() + self._tmp.cleanup() + + def test_missing_file_scans_empty(self): + self.assertEqual(customgames.scan(), []) + self.assertEqual(customgames.names(), []) + + def test_add_then_scan_returns_game(self): + self.assertTrue(customgames.add("SPT")) + games = customgames.scan() + self.assertEqual(len(games), 1) + self.assertEqual(games[0].name, "SPT") + self.assertEqual(games[0].launcher, "custom") + self.assertTrue(self._file.exists()) # persisted + + def test_add_is_idempotent_case_insensitive(self): + self.assertTrue(customgames.add("SPT")) + self.assertFalse(customgames.add("spt")) # already present + self.assertFalse(customgames.add(" ")) # blank + self.assertEqual(customgames.names(), ["SPT"]) + + def test_remove(self): + customgames.add("SPT") + customgames.add("Minecraft") + self.assertTrue(customgames.remove("spt")) # case-insensitive + self.assertEqual(customgames.names(), ["Minecraft"]) + self.assertFalse(customgames.remove("nope")) + + def test_scan_sorted_by_name(self): + for n in ("Zomboid", "Apex", "SPT"): + customgames.add(n) + self.assertEqual([g.name for g in customgames.scan()], ["Apex", "SPT", "Zomboid"]) + + def test_command_and_logdir_stored_and_resolved(self): + logs = Path(self._tmp.name) / "logs" + logs.mkdir() + sh = Path(self._tmp.name) / "tarkov.sh" + sh.write_text("#!/bin/sh\n") + self.assertTrue(customgames.add("SPT", command=str(sh), logdir=str(logs))) + self.assertEqual(customgames.command("SPT"), [str(sh)]) + self.assertEqual(customgames.log_dir("SPT"), str(logs)) + + def test_logdir_inferred_from_sibling_logs(self): + # A command with a sibling logs/ dir (SPT's layout) → logdir auto-detected. + sh = Path(self._tmp.name) / "tarkov.sh" + sh.write_text("#!/bin/sh\n") + (Path(self._tmp.name) / "logs").mkdir() + self.assertTrue(customgames.add("SPT", command=str(sh))) + self.assertEqual(customgames.log_dir("SPT"), str(Path(self._tmp.name) / "logs")) + + def test_no_command_resolves_to_none(self): + customgames.add("SPT") + self.assertIsNone(customgames.command("SPT")) + self.assertIsNone(customgames.command("missing")) + self.assertIsNone(customgames.log_dir("SPT")) + + def test_corrupt_file_degrades_to_empty(self): + self._file.parent.mkdir(parents=True, exist_ok=True) + self._file.write_text("{not json") + self.assertEqual(customgames.scan(), []) + # and a subsequent add still works (overwrites the garbage) + self.assertTrue(customgames.add("SPT")) + self.assertEqual(customgames.names(), ["SPT"]) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/test_gamelogs.py b/tests/test_gamelogs.py index 687e5e1..d444fa3 100644 --- a/tests/test_gamelogs.py +++ b/tests/test_gamelogs.py @@ -47,6 +47,36 @@ class CollectTests(unittest.TestCase): self.assertEqual(gamelogs.collect(), "") +class CustomGameLogTests(unittest.TestCase): + def test_collect_includes_custom_game_logs(self): + tmp = Path(tempfile.mkdtemp()) + (tmp / "tarkov-latest.log").write_text(">>> Tarkov gone. clean exit") + (tmp / "server-latest.log").write_text("SPT server error: mod failed to load") + with mock.patch.object(gamelogs, "_proton_logs", return_value=[]), \ + mock.patch.object(gamelogs, "_steam_console", return_value=None), \ + mock.patch("rigdoctor.core.customgames.log_dir", return_value=str(tmp)): + out = gamelogs.collect(game="SPT") + self.assertIn("SPT log", out) + self.assertIn("server-latest.log", out) + self.assertIn("mod failed to load", out) + + def test_custom_logs_skipped_when_stale(self): + tmp = Path(tempfile.mkdtemp()) + old = tmp / "tarkov-latest.log" + old.write_text("an earlier session") + old_mtime = time.time() - 3600 + os.utime(old, (old_mtime, old_mtime)) + with mock.patch.object(gamelogs, "_proton_logs", return_value=[]), \ + mock.patch.object(gamelogs, "_steam_console", return_value=None), \ + mock.patch("rigdoctor.core.customgames.log_dir", return_value=str(tmp)): + self.assertEqual(gamelogs.collect(since=time.time() - 60, game="SPT"), "") + + def test_no_game_means_no_custom_logs(self): + with mock.patch.object(gamelogs, "_proton_logs", return_value=[]), \ + mock.patch.object(gamelogs, "_steam_console", return_value=None): + self.assertEqual(gamelogs.collect(), "") # game=None → custom lookup skipped + + class SinceScopingTests(unittest.TestCase): def test_since_filter_keeps_window_only(self): text = ( -- 2.52.0 From 0f9cb4b68469bb2957fca1db4269b582bba88214 Mon Sep 17 00:00:00 2001 From: Jessey van Offeren Date: Fri, 29 May 2026 16:09:02 +0200 Subject: [PATCH 3/3] chore(release): v0.42.0 Co-Authored-By: Claude Opus 4.8 (1M context) --- CHANGELOG.md | 20 ++++++++++++++++++++ pyproject.toml | 2 +- src/rigdoctor/__init__.py | 2 +- 3 files changed, 22 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b7035ed..b6e6c5b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,26 @@ All notable changes to RigDoctor are recorded here. Format follows (`MAJOR.MINOR.PATCH`, pre-1.0). `__version__` and `pyproject.toml` must match the git release tag (so the auto-updater, D18, can compare versions). +## [0.42.0] - 2026-05-29 +### Added +- **Detect hard freezes that log no Xid.** The kernel-log scanner caught Xid codes, OOM, panic, + MCE, PCIe AER, thermal events, and amdgpu resets — but a crash that logs *no* Xid slipped + through. It now flags the NVIDIA open-kernel-module **VA-space mapping fault** (`gpu_vaspace.c` + / `dmaAllocMapping` assertions, NVKMS GEM-allocation failures) — a driver-internal error that + can storm for minutes and end in a freeze without the GPU ever "falling off the bus" (distinct + from Xid 79). A new `check_nvidia_module()` notes when the open module (`nvidia-*-open`) is + loaded — the context behind these faults — and a new `ai_knowledge` entry lets the assistant + tell the no-Xid freeze apart from the Xid 79 hardware drop. +- **Add games no launcher reports (e.g. SPT).** A user-authored custom-games list + (`core/customgames.py`) shows alongside Steam/Lutris/Heroic in `rigdoctor games` and the GUI + ("Add game…"), for standalone mod launchers (Single-Player Tarkov), itch.io downloads, or any + hand-installed game. Each entry can carry a launch command and a log directory: + `rigdoctor games add "SPT" --command .../tarkov.sh` (a sibling `logs/` is auto-detected), + `rigdoctor games play "SPT"` launches it under the crash-capture wrapper (tagged with the real + name, not the script's), and the diagnostic now tails the game's *own* logs — SPT's + server/launcher logs — alongside the kernel log so the analysis sees what the game logged + before the freeze. + ## [0.41.0] - 2026-05-25 ### Added - **Import a crash dump (`.dmp`) and explain it with AI.** The **Games** page gains an diff --git a/pyproject.toml b/pyproject.toml index 68e09ee..376253b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "rigdoctor" -version = "0.41.0" +version = "0.42.0" description = "Modular hardware monitoring & crash diagnostics for Linux gamers." readme = "README.md" requires-python = ">=3.11" diff --git a/src/rigdoctor/__init__.py b/src/rigdoctor/__init__.py index d60de26..2dcfd8b 100644 --- a/src/rigdoctor/__init__.py +++ b/src/rigdoctor/__init__.py @@ -1,3 +1,3 @@ """RigDoctor — modular hardware monitoring & crash diagnostics for Linux gamers.""" -__version__ = "0.41.0" +__version__ = "0.42.0" -- 2.52.0