diff --git a/CHANGELOG.md b/CHANGELOG.md index 85b4438..f5a60f8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,18 @@ All notable changes to RigDoctor are recorded here. Format follows (`MAJOR.MINOR.PATCH`, pre-1.0). `__version__` and `pyproject.toml` must match the git release tag (so the auto-updater, D18, can compare versions). +## [0.11.0] - 2026-05-22 +### Added +- **Guided diagnostic session (CLI) — the seed use case, end to end.** `rigdoctor diagnose + start --game ""` runs a **focused crash-capture tagged with that game** (its own + diagnostic log, so the report is scoped to just that session), `diagnose status` shows + progress, and `diagnose finish` stops it and prints a combined report: the **capture + summary** (peak temps/power, GPU-lost events, last samples — M3) plus the **health findings** + (Xid/SMART/driver/etc. — M4). The game can be given by `--game` or `--appid` (resolved from + the Steam scan), and is recorded as a log event so it survives a crash + reboot. +- Shared orchestration lives in `core/diagnostic.py` (one callable for CLI/GUI/tray, per + ARCHITECTURE §7.1); the recorder/`record run` gained an optional `--game` tag. + ## [0.10.2] - 2026-05-22 ### Changed - When an Environment **Apply**/**Install** fails, the status now shows the **real reason** diff --git a/docs/ROADMAP.md b/docs/ROADMAP.md index 73f8ced..3ab7309 100644 --- a/docs/ROADMAP.md +++ b/docs/ROADMAP.md @@ -40,8 +40,11 @@ Ubuntu + NVIDIA first; `.deb` distribution (see `DECISIONS.md`). - [ ] M10 desktop GUI (PySide6: dashboard, log browser, report viewer, logger controls) - [ ] M11 tray / menu-bar applet (QSystemTrayIcon: live M1 readouts + Run Diagnostic + supporting actions — D13) -- [ ] Guided diagnostic session (pick game → focused M3 capture → M4 scan → findings), - shared by tray/GUI/CLI +- [~] Guided diagnostic session (pick game → focused M3 capture → M4 scan → findings), + shared by tray/GUI/CLI — *core + CLI done* (`core/diagnostic.py`, `rigdoctor diagnose + start/status/finish`): tags a focused capture with the chosen game (own diagnostic log, + window-scoped report) and combines the capture summary with the M4 findings. *Pending:* + the GUI/tray "Run Diagnostic" button, and auto start/stop via the D12 wrapper/watcher. - [ ] Logger trigger modes: always-on + game-launch (D12 — wrapper first: `rigdoctor wrap %command%` + global Steam compat-tool; zero-config watcher (Steam RunningAppID + /proc) and GameMode hook follow) diff --git a/pyproject.toml b/pyproject.toml index 4930e12..87a50d5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "rigdoctor" -version = "0.10.2" +version = "0.11.0" description = "Modular hardware monitoring & crash diagnostics for Linux gamers." readme = "README.md" requires-python = ">=3.11" diff --git a/src/rigdoctor/__init__.py b/src/rigdoctor/__init__.py index 2a05370..03e8ee2 100644 --- a/src/rigdoctor/__init__.py +++ b/src/rigdoctor/__init__.py @@ -1,3 +1,3 @@ """RigDoctor — modular hardware monitoring & crash diagnostics for Linux gamers.""" -__version__ = "0.10.2" +__version__ = "0.11.0" diff --git a/src/rigdoctor/cli.py b/src/rigdoctor/cli.py index 98a527b..c713f8e 100644 --- a/src/rigdoctor/cli.py +++ b/src/rigdoctor/cli.py @@ -86,6 +86,7 @@ def cmd_record_run(args) -> int: max_bytes=cfg["log_max_bytes"], backups=cfg["log_backups"], status_path=config.STATUS_FILE, + game=getattr(args, "game", None), ) def _handle(_sig, _frame): @@ -345,6 +346,77 @@ def cmd_report(args) -> int: return 0 +def _resolve_game(args) -> str | None: + """Game name from --game, or looked up from --appid via the Steam scan.""" + if getattr(args, "game", None): + return args.game + if getattr(args, "appid", None): + from .core import steam + + for g in steam.scan_games(steam.selected_library_paths()): + if g.appid == str(args.appid): + return g.name + return None + return None + + +def cmd_diagnose(args) -> int: + from .core import diagnostic, reccontrol, steam + + sub = args.diagnose_cmd or "status" + + if sub == "start": + if reccontrol.running_pid(): + print("A capture is already running — finish it with: rigdoctor diagnose finish") + return 1 + game = _resolve_game(args) + if game is None and (args.game or args.appid): + print("Couldn't match that game in your selected Steam libraries.") + return 1 + if game is None: + games = steam.cached_games() or steam.scan_games(steam.selected_library_paths()) + if games: + print("Pick a game to focus on, then re-run with --game:") + for g in games: + print(f" --game {g.name!r}") + else: + print("No games detected. Select a library: rigdoctor games libraries --all") + return 1 + pid = diagnostic.start(game=game, interval=args.interval) + time.sleep(1.0) + if pid and reccontrol.pid_alive(pid): + print(f"Diagnostic capture started for {game!r} (pid {pid}).") + print(" Play your game. When you're done (or after a crash + reboot):") + print(" rigdoctor diagnose finish") + return 0 + print(f"Capture failed to start; see {config.SPAWN_LOG}") + return 1 + + if sub == "status": + status = diagnostic.active() + if not status: + print("No diagnostic capture is running.") + return 0 + game = status.get("game") or "—" + print(f"Capturing for {game!r}: {status.get('samples', 0)} samples" + + (" · GPU-lost seen" if status.get("gpu_lost") else "")) + return 0 + + # finish + if not reccontrol.running_pid() and not config.DIAG_LOG.exists(): + print("No diagnostic to analyze. Start one with: rigdoctor diagnose start --game ") + return 1 + print("Stopping capture and analyzing…\n") + result = diagnostic.finish(last_n=args.last) + from .render import render_health, render_summary + + if result.game: + print(f"Diagnostic — {result.game}\n") + print(render_summary(result.summary, log_path=config.DIAG_LOG)) + print("\n" + render_health(result.findings, title="Findings")) + return 0 + + def cmd_gameenv(args) -> int: from dataclasses import asdict @@ -470,6 +542,7 @@ def build_parser() -> argparse.ArgumentParser: run_p = rec_sub.add_parser("run", help="run the capture loop in the foreground (systemd-friendly)") run_p.add_argument("-n", "--interval", type=float, default=None, help="sampling interval (s)") run_p.add_argument("-o", "--out", default=None, help="log file path") + run_p.add_argument("--game", default=None, help="tag the capture with a game name (M6/diagnose)") run_p.set_defaults(func=cmd_record_run) start_p = rec_sub.add_parser("start", help="start recording in the background") @@ -519,6 +592,19 @@ def build_parser() -> argparse.ArgumentParser: env_p = sub.add_parser("gameenv", help="gaming environment checks (M6): flag stability/perf settings") env_p.add_argument("--json", action="store_true", help="output JSON instead of text") env_p.set_defaults(func=cmd_gameenv) + + diag_p = sub.add_parser("diagnose", help="guided diagnostic: capture while gaming, then analyze") + diag_sub = diag_p.add_subparsers(dest="diagnose_cmd") + diag_start = diag_sub.add_parser("start", help="start a focused capture for a game") + diag_start.add_argument("--game", default=None, help="game name to focus on") + diag_start.add_argument("--appid", default=None, help="Steam appid to focus on (resolved to a name)") + diag_start.add_argument("-n", "--interval", type=float, default=None, help="sampling interval (s)") + diag_start.set_defaults(func=cmd_diagnose) + diag_sub.add_parser("status", help="show the in-progress diagnostic").set_defaults(func=cmd_diagnose) + diag_finish = diag_sub.add_parser("finish", help="stop the capture and analyze it") + diag_finish.add_argument("--last", type=int, default=10, help="recent samples to show") + diag_finish.set_defaults(func=cmd_diagnose) + diag_p.set_defaults(func=cmd_diagnose, diagnose_cmd=None, last=10) return p diff --git a/src/rigdoctor/config.py b/src/rigdoctor/config.py index f0c81ea..ee1704f 100644 --- a/src/rigdoctor/config.py +++ b/src/rigdoctor/config.py @@ -23,6 +23,9 @@ CONFIG_FILE = CONFIG_DIR / "config.toml" # Crash-capture logger (M3) LOG_FILE = LOG_DIR / "capture.jsonl" +# Guided diagnostic (M6/D12): a focused capture writes here, separate from the always-on +# crash log, so its report covers only that session's window. +DIAG_LOG = LOG_DIR / "diagnostic.jsonl" STATUS_FILE = STATE_DIR / "recorder.json" PID_FILE = STATE_DIR / "recorder.pid" SPAWN_LOG = STATE_DIR / "recorder.out" diff --git a/src/rigdoctor/core/diagnostic.py b/src/rigdoctor/core/diagnostic.py new file mode 100644 index 0000000..c4455a8 --- /dev/null +++ b/src/rigdoctor/core/diagnostic.py @@ -0,0 +1,84 @@ +"""Guided diagnostic session (SPEC §4 / ARCHITECTURE §7.1): orchestrate M3 + M4. + +The seed use case, one flow: **pick a game** → **focused crash-capture** scoped to that +session (M3, tagged with the game) → on **finish**, **scan & analyze** (M4 health report) +over the captured window + system logs → return a prioritized result. This is not a new +module — it's a single shared callable so the CLI, GUI, and tray run the identical flow. + +The capture is **manually bracketed** (start/finish) for now; auto start/stop on game launch +(the D12 wrapper/watcher) plugs in here later without changing the result shape. +""" + +from __future__ import annotations + +import time +from dataclasses import dataclass + +from .. import config +from . import reccontrol +from .crashlog import Summary, summarize +from .health import Finding + + +@dataclass +class DiagnosticResult: + game: str | None + summary: Summary # capture window: peak temps/power, events, last samples (M3) + findings: list[Finding] # health findings: Xid/SMART/driver/etc. (M4) + + +def _clear_diag_log() -> None: + """Each diagnostic is a fresh focused capture — drop any previous session + segments.""" + base = config.DIAG_LOG + for p in [base, *base.parent.glob(base.name + ".*")]: + try: + p.unlink() + except OSError: + pass + + +def start(game: str | None = None, interval: float | None = None) -> int | None: + """Begin a focused capture, tagged with the game, into the dedicated diagnostic log. + Returns the pid, or None if a capture is already running.""" + if reccontrol.running_pid(): + return None + _clear_diag_log() + return reccontrol.start_background(interval=interval, out=str(config.DIAG_LOG), game=game) + + +def is_running() -> bool: + return reccontrol.running_pid() is not None + + +def active() -> dict | None: + """Status of the in-progress session (running flag, game, samples), or None if idle.""" + if not is_running(): + return None + return reccontrol.read_status() + + +def _await_stopped(timeout: float = 6.0) -> None: + deadline = time.monotonic() + timeout + while reccontrol.running_pid() and time.monotonic() < deadline: + time.sleep(0.1) + + +def _game_from_summary(summary: Summary) -> str | None: + """Recover the focused game from the log's 'game' event (survives a crash + reboot).""" + for _ts, kind, detail in reversed(summary.events): + if kind == "game" and detail: + return detail + return None + + +def finish(last_n: int = 10, log_path=None) -> DiagnosticResult: + """Stop the capture (if running), summarize the window, and run the health report.""" + from .health import run_health_checks + + reccontrol.stop_background() + _await_stopped() + path = log_path or config.DIAG_LOG + summary = summarize(path, last_n=last_n) + game = _game_from_summary(summary) or (reccontrol.read_status() or {}).get("game") + findings = run_health_checks() + return DiagnosticResult(game=game, summary=summary, findings=findings) diff --git a/src/rigdoctor/core/reccontrol.py b/src/rigdoctor/core/reccontrol.py index ac33be9..2d62b11 100644 --- a/src/rigdoctor/core/reccontrol.py +++ b/src/rigdoctor/core/reccontrol.py @@ -38,7 +38,9 @@ def read_status() -> dict | None: return None -def start_background(interval: float | None = None, out: str | None = None) -> int | None: +def start_background( + interval: float | None = None, out: str | None = None, game: str | None = None +) -> int | None: """Spawn a detached `record run`. Returns the child pid, or None if already running.""" if running_pid(): return None @@ -48,6 +50,8 @@ def start_background(interval: float | None = None, out: str | None = None) -> i cmd += ["--interval", str(interval)] if out: cmd += ["--out", out] + if game: + cmd += ["--game", game] out_fh = open(config.SPAWN_LOG, "a") proc = subprocess.Popen( cmd, diff --git a/src/rigdoctor/core/recorder.py b/src/rigdoctor/core/recorder.py index d9f2187..1125753 100644 --- a/src/rigdoctor/core/recorder.py +++ b/src/rigdoctor/core/recorder.py @@ -27,12 +27,14 @@ class Recorder: backups: int = 10, status_path=None, sampler: Sampler | None = None, + game: str | None = None, ) -> None: self.interval = interval self.sampler = sampler or Sampler(available_sources()) self.writer = CrashLogWriter(log_path, max_bytes, backups) self.log_path = Path(log_path) self.status_path = Path(status_path) if status_path else None + self.game = game or None self.samples = 0 self._stop = threading.Event() self._gpu_lost = False @@ -43,6 +45,8 @@ class Recorder: def run(self) -> None: self.writer.write_event("session-start", f"interval={self.interval:g}s") + if self.game: + self.writer.write_event("game", self.game) # tag the focused-diagnostic target self._write_status(running=True) try: while not self._stop.is_set(): @@ -81,6 +85,7 @@ class Recorder: "samples": self.samples, "updated": time.time(), "gpu_lost": self._gpu_lost, + "game": self.game, } if sample is not None: data["latest"] = headline(sample) diff --git a/tests/test_diagnostic.py b/tests/test_diagnostic.py new file mode 100644 index 0000000..46ec89e --- /dev/null +++ b/tests/test_diagnostic.py @@ -0,0 +1,61 @@ +"""Tests for the guided diagnostic orchestration (M3+M4 glue).""" + +import tempfile +import time +import unittest +from pathlib import Path +from unittest import mock + +from rigdoctor.core import diagnostic +from rigdoctor.core.crashlog import CrashLogWriter, summarize +from rigdoctor.core.health import Finding +from rigdoctor.core.sample import Reading, Sample + + +def _write_log(path: str, game: str) -> None: + w = CrashLogWriter(path) + w.write_event("session-start", "interval=1s") + w.write_event("game", game) + for temp in (60.0, 72.0, 81.0): + w.write_sample(Sample(ts=time.time(), readings=[Reading("gpu", "temp", temp, "°C", "")])) + w.write_event("gpu-lost", "nvidia-smi query timed out") + w.close() + + +class GameRecoveryTests(unittest.TestCase): + def test_game_recovered_from_log_event(self): + with tempfile.TemporaryDirectory() as d: + log = str(Path(d) / "capture.jsonl") + _write_log(log, "Path of Exile 2") + summary = summarize(log) + self.assertEqual(diagnostic._game_from_summary(summary), "Path of Exile 2") + + def test_no_game_event_returns_none(self): + with tempfile.TemporaryDirectory() as d: + log = str(Path(d) / "capture.jsonl") + w = CrashLogWriter(log) + w.write_event("session-start") + w.close() + self.assertIsNone(diagnostic._game_from_summary(summarize(log))) + + +class FinishTests(unittest.TestCase): + def test_finish_combines_summary_and_findings(self): + with tempfile.TemporaryDirectory() as d: + log = Path(d) / "capture.jsonl" + _write_log(str(log), "Satisfactory") + fake = [Finding("warning", "GPU", "NVIDIA Xid 79 ×1", "fell off the bus")] + with mock.patch("rigdoctor.core.health.run_health_checks", return_value=fake), \ + mock.patch.object(diagnostic.reccontrol, "stop_background", return_value=False), \ + mock.patch.object(diagnostic.reccontrol, "running_pid", return_value=None): + result = diagnostic.finish(log_path=log) + self.assertEqual(result.game, "Satisfactory") + self.assertEqual(result.summary.samples, 3) + self.assertEqual(result.findings, fake) + # peak GPU temp captured in the window, GPU-lost event recorded + self.assertEqual(result.summary.maxima["gpu.temp"][0], 81.0) + self.assertTrue(any(kind == "gpu-lost" for _ts, kind, _d in result.summary.events)) + + +if __name__ == "__main__": + unittest.main()