feat(health): detect no-Xid GPU freezes (open-module VA-space faults) #46

Merged
jessey merged 4 commits from feat/gpu-vaspace-spt into main 2026-05-29 14:10:59 +00:00
10 changed files with 360 additions and 10 deletions
Showing only changes of commit b9bfec961c - Show all commits
+60 -2
View File
@@ -525,13 +525,13 @@ def cmd_gameenv(args) -> int:
def cmd_games(args) -> int:
from dataclasses import asdict
from .core import launchers, steam
from .core import customgames, launchers, steam
selected = steam.selected_library_paths()
result = steam.rescan() if selected else None
steam_games = result.games if result else []
extra = launchers.scan() # non-Steam (Lutris/Heroic)
all_games = list(steam_games) + list(extra)
all_games = list(steam_games) + list(extra) + customgames.scan() # + user-added (SPT etc.)
if args.json:
print(json.dumps({
@@ -596,6 +596,50 @@ def cmd_games_libraries(args) -> int:
return 0
def cmd_games_add(args) -> int:
from .core import customgames
if customgames.add(args.name, command=args.command, logdir=args.logdir):
print(f"Added '{args.name}' to your games (custom). It'll show in `rigdoctor games` "
"and the diagnostic game picker.")
entry = customgames.get(args.name) or {}
if entry.get("command"):
print(f" launch: {entry['command']} (run with: rigdoctor games play \"{args.name}\")")
if entry.get("logdir"):
print(f" logs: {entry['logdir']} (included in crash diagnostics)")
return 0
print(f"'{args.name}' is blank or already in your custom games.")
return 1
def cmd_games_play(args) -> int:
from .core import customgames, wrap
command = customgames.command(args.name)
if command is None:
if customgames.get(args.name) is None:
print(f"'{args.name}' isn't in your custom games. Add it: "
f"rigdoctor games add \"{args.name}\" --command <launch script>")
else:
print(f"'{args.name}' has no launch command. Set one: "
f"rigdoctor games remove \"{args.name}\" && rigdoctor games add \"{args.name}\" "
"--command <launch script>")
return 1
print(f"Launching '{args.name}' with crash-capture… (capture stops cleanly on exit; "
"a hard freeze is flagged next time you open RigDoctor)")
return wrap.run(command, game=args.name)
def cmd_games_remove(args) -> int:
from .core import customgames
if customgames.remove(args.name):
print(f"Removed '{args.name}' from your custom games.")
return 0
print(f"'{args.name}' isn't in your custom games. Current: {', '.join(customgames.names()) or '(none)'}")
return 1
def build_parser() -> argparse.ArgumentParser:
p = argparse.ArgumentParser(
prog="rigdoctor",
@@ -681,6 +725,20 @@ def build_parser() -> argparse.ArgumentParser:
lib_p.add_argument("--json", action="store_true", help="output JSON")
lib_p.set_defaults(func=cmd_games_libraries)
add_p = games_sub.add_parser("add", help="add a game no launcher reports (e.g. SPT)")
add_p.add_argument("name", help="game name, e.g. \"SPT\"")
add_p.add_argument("--command", default=None,
help="launch command/script (e.g. the path to tarkov.sh) — enables `games play`")
add_p.add_argument("--logdir", default=None,
help="the game's own log directory (auto-detected as <command dir>/logs if present)")
add_p.set_defaults(func=cmd_games_add)
play_p = games_sub.add_parser("play", help="launch a custom game with crash-capture (e.g. SPT)")
play_p.add_argument("name", help="game name to launch")
play_p.set_defaults(func=cmd_games_play)
rm_p = games_sub.add_parser("remove", help="remove a previously added custom game")
rm_p.add_argument("name", help="game name to remove")
rm_p.set_defaults(func=cmd_games_remove)
env_p = sub.add_parser("gameenv", help="gaming environment checks (M6): flag stability/perf settings")
env_p.add_argument("--json", action="store_true", help="output JSON instead of text")
env_p.set_defaults(func=cmd_gameenv)
+3
View File
@@ -36,6 +36,9 @@ SPAWN_LOG = STATE_DIR / "recorder.out"
# Gaming environment / game detection (M6) — cached Steam game scan (mutable state,
# not config: refreshed by the background scan on every launch).
GAMES_FILE = STATE_DIR / "games.json"
# User-added games that no launcher reports (e.g. SPT/standalone mod launchers). Authored
# by the user (not a refreshable cache), so it lives in DATA_DIR and persists across scans.
CUSTOM_GAMES_FILE = DATA_DIR / "custom-games.json"
# Logging & reports (opt-in via `logging_enabled`). App log: rotating file of app events.
# Each diagnostic is stored under DIAGNOSTICS_DIR/<id>/; "Report" zips one into REPORTS_DIR.
+113
View File
@@ -0,0 +1,113 @@
"""User-added games (M6): a manual list for titles no launcher reports.
Some games never show up in a Steam/Lutris/Heroic scan — standalone mod launchers like
**SPT** (Single-Player Tarkov), itch.io downloads, or any hand-installed executable. This
module keeps a small user-authored list so those still appear in the game list and can be
picked for a focused diagnostic, in the same `steam.Game` shape as every other source.
Each entry is a name plus two optionals: a **launch command** (so `rigdoctor games play`
can start it under the auto-capture wrapper) and a **log directory** (so a crash diagnostic
can read the game's own logs — e.g. SPT's `logs/tarkov-latest.log`). Stored as JSON in
`config.CUSTOM_GAMES_FILE`; stdlib only; every reader degrades to [] on a missing/bad file.
"""
from __future__ import annotations
import json
import os
import shlex
from .. import config
from .steam import Game
LAUNCHER = "custom"
def _load() -> list[dict]:
try:
data = json.loads(config.CUSTOM_GAMES_FILE.read_text())
except (OSError, ValueError):
return []
games = data.get("games") if isinstance(data, dict) else None
return [g for g in games if isinstance(g, dict) and g.get("name")] if isinstance(games, list) else []
def _save(games: list[dict]) -> None:
config.CUSTOM_GAMES_FILE.parent.mkdir(parents=True, exist_ok=True)
config.CUSTOM_GAMES_FILE.write_text(json.dumps({"games": games}, indent=2, ensure_ascii=False) + "\n")
def names() -> list[str]:
"""Just the stored names (insertion order preserved)."""
return [str(g["name"]) for g in _load()]
def get(name: str) -> dict | None:
"""The stored entry (name + optional command/logdir) for a game, case-insensitive."""
name = (name or "").strip().lower()
return next((g for g in _load() if str(g["name"]).lower() == name), None)
def add(name: str, command: str | None = None, logdir: str | None = None) -> bool:
"""Add a game by name, with an optional launch command and log directory.
Returns False if the name is blank or already present (case-insensitive). When a command
is given but no logdir, a sibling `logs/` dir is inferred if it exists (covers SPT's layout).
"""
name = (name or "").strip()
if not name:
return False
if get(name):
return False
entry: dict = {"name": name}
command = (command or "").strip()
if command:
entry["command"] = command
if not logdir:
sibling = os.path.join(os.path.dirname(_argv0(command)), "logs")
if os.path.isdir(sibling):
logdir = sibling
logdir = (logdir or "").strip()
if logdir:
entry["logdir"] = os.path.expanduser(logdir)
games = _load()
games.append(entry)
_save(games)
return True
def remove(name: str) -> bool:
"""Remove a game by name (case-insensitive). Returns True if one was removed."""
name = (name or "").strip().lower()
games = _load()
kept = [g for g in games if str(g["name"]).lower() != name]
if len(kept) == len(games):
return False
_save(kept)
return True
def _argv0(command: str) -> str:
parts = shlex.split(command)
return parts[0] if parts else command
def command(name: str) -> list[str] | None:
"""The launch argv for a game (shlex-split), or None if it has no command."""
entry = get(name)
cmd = (entry or {}).get("command")
return shlex.split(cmd) if cmd else None
def log_dir(name: str) -> str | None:
"""The game's own log directory, or None if it isn't set / doesn't exist."""
entry = get(name)
path = (entry or {}).get("logdir")
return path if path and os.path.isdir(path) else None
def scan() -> list[Game]:
"""User-added games as `Game` objects (launcher='custom'), sorted by name."""
out = [Game(appid="", name=str(g["name"]), library="", installdir="", launcher=LAUNCHER)
for g in _load()]
return sorted(out, key=lambda g: g.name.lower())
+1 -1
View File
@@ -75,7 +75,7 @@ def store(result, capture_path=None, since: float | None = None) -> Path | None:
_write(target / "report.txt", "\n".join(report))
try:
logs = gamelogs.collect(since=since)
logs = gamelogs.collect(since=since, game=getattr(result, "game", None))
if logs:
_write(target / "gamelogs.txt", logs)
except OSError:
+35 -2
View File
@@ -81,15 +81,48 @@ def available() -> bool:
return bool(_proton_logs() or _steam_console())
def collect(since: float | None = None, max_bytes: int = 8000) -> str:
"""Recent Proton + Steam log tails as one labelled text block ('' if none).
def _custom_game_logs(game: str, since: float | None, max_bytes: int) -> list[str]:
"""Tail the recent ``*.log`` files in a custom game's own log dir (e.g. SPT's
``logs/tarkov-latest.log`` + ``server-latest.log``), newest first, freshness-scoped by mtime.
Custom-game logs use their own timestamp formats, so we scope by file mtime (like the Proton
log) rather than the ``[YYYY-MM-DD …]`` line filter used for the Steam console.
"""
from . import customgames
directory = customgames.log_dir(game)
if not directory:
return []
try:
files = [p for p in Path(directory).glob("*.log") if p.is_file()]
except OSError:
return []
files.sort(key=_mtime, reverse=True)
sections: list[str] = []
for log in files[:4]: # a session touches a handful (tarkov/server/launcher latest)
if since is not None and _mtime(log) < since:
continue
tail = _tail(log, max_bytes).strip()
if tail:
sections.append(f"--- {game} log ({log.name}) ---\n{tail}")
return sections
def collect(since: float | None = None, max_bytes: int = 8000, game: str | None = None) -> str:
"""Recent Proton + Steam (+ custom-game) log tails as one labelled text block ('' if none).
With ``since`` (epoch), scope to that session: skip a Proton log not written during/after
the session (a stale per-app log from an earlier game), and keep only Steam-console lines
timestamped at/after ``since`` — so we don't feed the model an unrelated past session.
``game`` (the diagnostic's focused title) pulls in that custom game's own logs if it has a
registered log dir — e.g. SPT's server/launcher logs, which Steam/Proton never see.
"""
sections: list[str] = []
if game:
sections += _custom_game_logs(game, since, max_bytes)
protons = _proton_logs()
if protons:
log = protons[0]
+7 -3
View File
@@ -40,16 +40,20 @@ def launch_option() -> str:
return f"{quoted} wrap %command%"
def run(command: list[str]) -> int:
def run(command: list[str], game: str | None = None) -> int:
"""Start a focused capture (unless one's already running), run the game, then stop it.
Returns the game's exit code so Steam sees the right status."""
Returns the game's exit code so Steam sees the right status.
`game` overrides name detection — used by `games play` for a custom game (e.g. SPT), where
there's no SteamAppId and the bare script name (tarkov.sh) wouldn't tag the capture usefully.
"""
from . import diagnostic, reccontrol
if not command:
print("usage: rigdoctor wrap %command% (set as a Steam launch option)", file=sys.stderr)
return 2
game = game_name_from_env() or os.path.basename(command[0])
game = game or game_name_from_env() or os.path.basename(command[0])
started = False
if not reccontrol.running_pid(): # don't disturb an existing capture
started = diagnostic.start(game=game) is not None
+1 -1
View File
@@ -143,7 +143,7 @@ class DiagnosticDialog(QDialog):
lines.append("\nCapture summary:\n" + render_summary(summary))
since = (summary.start - 60) if summary.start else None
logs = gamelogs.collect(since=since) # scoped to this session
logs = gamelogs.collect(since=since, game=result.game) # scoped to this session
if logs:
lines.append("\nGame/Proton/Steam logs for this session:\n" + logs)
sys_logs = syslogs.collect(since=since) # kernel log + crashed-process records
+25 -1
View File
@@ -115,6 +115,10 @@ class GamesPage(QWidget):
self._autocap_btn = QPushButton("Auto-capture…")
self._autocap_btn.clicked.connect(self._show_autocapture)
header.addWidget(self._autocap_btn)
# Add a game no launcher reports (e.g. SPT / standalone mod launchers).
self._add_btn = QPushButton("Add game…")
self._add_btn.clicked.connect(self._add_custom_game)
header.addWidget(self._add_btn)
self._rescan_btn = QPushButton("Rescan")
self._rescan_btn.setObjectName("PrimaryButton")
self._rescan_btn.clicked.connect(self.refresh)
@@ -235,7 +239,9 @@ class GamesPage(QWidget):
]
self._libraries_ready.emit(libs)
try:
self._extra_games = launchers.scan() # Lutris / Heroic (non-Steam)
from ..core import customgames
# non-Steam: Lutris/Heroic + user-added games (SPT etc.)
self._extra_games = list(launchers.scan()) + customgames.scan()
except Exception:
self._extra_games = []
self._scanned.emit(steam.rescan())
@@ -423,6 +429,24 @@ class GamesPage(QWidget):
reccontrol.stop_background()
self._banner.hide()
def _add_custom_game(self) -> None:
"""Manually add a game no launcher reports (e.g. SPT), then rescan to show it."""
from PySide6.QtWidgets import QInputDialog
from ..core import customgames
name, ok = QInputDialog.getText(
self, "Add game", "Game name (e.g. SPT) — for titles no launcher reports:")
if not ok:
return
name = name.strip()
if not name:
return
if customgames.add(name):
self.refresh()
else:
QMessageBox.information(self, "Add game", f"'{name}' is already in your games.")
def _show_autocapture(self) -> None:
from ..core import wrap
+85
View File
@@ -0,0 +1,85 @@
"""Tests for user-added games (M6): add/remove/scan of titles no launcher reports (e.g. SPT)."""
import tempfile
import unittest
from pathlib import Path
from unittest import mock
from rigdoctor.core import customgames
class CustomGamesTests(unittest.TestCase):
def setUp(self):
self._tmp = tempfile.TemporaryDirectory()
self._file = Path(self._tmp.name) / "custom-games.json"
self._patch = mock.patch.object(customgames.config, "CUSTOM_GAMES_FILE", self._file)
self._patch.start()
def tearDown(self):
self._patch.stop()
self._tmp.cleanup()
def test_missing_file_scans_empty(self):
self.assertEqual(customgames.scan(), [])
self.assertEqual(customgames.names(), [])
def test_add_then_scan_returns_game(self):
self.assertTrue(customgames.add("SPT"))
games = customgames.scan()
self.assertEqual(len(games), 1)
self.assertEqual(games[0].name, "SPT")
self.assertEqual(games[0].launcher, "custom")
self.assertTrue(self._file.exists()) # persisted
def test_add_is_idempotent_case_insensitive(self):
self.assertTrue(customgames.add("SPT"))
self.assertFalse(customgames.add("spt")) # already present
self.assertFalse(customgames.add(" ")) # blank
self.assertEqual(customgames.names(), ["SPT"])
def test_remove(self):
customgames.add("SPT")
customgames.add("Minecraft")
self.assertTrue(customgames.remove("spt")) # case-insensitive
self.assertEqual(customgames.names(), ["Minecraft"])
self.assertFalse(customgames.remove("nope"))
def test_scan_sorted_by_name(self):
for n in ("Zomboid", "Apex", "SPT"):
customgames.add(n)
self.assertEqual([g.name for g in customgames.scan()], ["Apex", "SPT", "Zomboid"])
def test_command_and_logdir_stored_and_resolved(self):
logs = Path(self._tmp.name) / "logs"
logs.mkdir()
sh = Path(self._tmp.name) / "tarkov.sh"
sh.write_text("#!/bin/sh\n")
self.assertTrue(customgames.add("SPT", command=str(sh), logdir=str(logs)))
self.assertEqual(customgames.command("SPT"), [str(sh)])
self.assertEqual(customgames.log_dir("SPT"), str(logs))
def test_logdir_inferred_from_sibling_logs(self):
# A command with a sibling logs/ dir (SPT's layout) → logdir auto-detected.
sh = Path(self._tmp.name) / "tarkov.sh"
sh.write_text("#!/bin/sh\n")
(Path(self._tmp.name) / "logs").mkdir()
self.assertTrue(customgames.add("SPT", command=str(sh)))
self.assertEqual(customgames.log_dir("SPT"), str(Path(self._tmp.name) / "logs"))
def test_no_command_resolves_to_none(self):
customgames.add("SPT")
self.assertIsNone(customgames.command("SPT"))
self.assertIsNone(customgames.command("missing"))
self.assertIsNone(customgames.log_dir("SPT"))
def test_corrupt_file_degrades_to_empty(self):
self._file.parent.mkdir(parents=True, exist_ok=True)
self._file.write_text("{not json")
self.assertEqual(customgames.scan(), [])
# and a subsequent add still works (overwrites the garbage)
self.assertTrue(customgames.add("SPT"))
self.assertEqual(customgames.names(), ["SPT"])
if __name__ == "__main__":
unittest.main()
+30
View File
@@ -47,6 +47,36 @@ class CollectTests(unittest.TestCase):
self.assertEqual(gamelogs.collect(), "")
class CustomGameLogTests(unittest.TestCase):
def test_collect_includes_custom_game_logs(self):
tmp = Path(tempfile.mkdtemp())
(tmp / "tarkov-latest.log").write_text(">>> Tarkov gone. clean exit")
(tmp / "server-latest.log").write_text("SPT server error: mod failed to load")
with mock.patch.object(gamelogs, "_proton_logs", return_value=[]), \
mock.patch.object(gamelogs, "_steam_console", return_value=None), \
mock.patch("rigdoctor.core.customgames.log_dir", return_value=str(tmp)):
out = gamelogs.collect(game="SPT")
self.assertIn("SPT log", out)
self.assertIn("server-latest.log", out)
self.assertIn("mod failed to load", out)
def test_custom_logs_skipped_when_stale(self):
tmp = Path(tempfile.mkdtemp())
old = tmp / "tarkov-latest.log"
old.write_text("an earlier session")
old_mtime = time.time() - 3600
os.utime(old, (old_mtime, old_mtime))
with mock.patch.object(gamelogs, "_proton_logs", return_value=[]), \
mock.patch.object(gamelogs, "_steam_console", return_value=None), \
mock.patch("rigdoctor.core.customgames.log_dir", return_value=str(tmp)):
self.assertEqual(gamelogs.collect(since=time.time() - 60, game="SPT"), "")
def test_no_game_means_no_custom_logs(self):
with mock.patch.object(gamelogs, "_proton_logs", return_value=[]), \
mock.patch.object(gamelogs, "_steam_console", return_value=None):
self.assertEqual(gamelogs.collect(), "") # game=None → custom lookup skipped
class SinceScopingTests(unittest.TestCase):
def test_since_filter_keeps_window_only(self):
text = (